{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "a35c28c0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8ff4e829dc0948a0874ad2799ed9bc75",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
    "\n",
    "from dataclasses import dataclass, field\n",
    "from typing import Optional\n",
    "import contextlib\n",
    "\n",
    "import torch\n",
    "from datasets import load_dataset\n",
    "from peft import LoraConfig\n",
    "from transformers import (\n",
    "    AutoModelForCausalLM,\n",
    "    AutoTokenizer,\n",
    "    BitsAndBytesConfig,\n",
    "    HfArgumentParser,\n",
    "    AutoTokenizer,\n",
    "    TrainingArguments,\n",
    ")\n",
    "from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel\n",
    "\n",
    "model = \"bigcode/starcoder\"\n",
    "tokenizer = AutoTokenizer.from_pretrained(model, trust_remote_code=True)\n",
    "model = AutoModelForCausalLM.from_pretrained(\n",
    "    model,\n",
    "    quantization_config=None,\n",
    "    device_map=None,\n",
    "    trust_remote_code=True,\n",
    "    torch_dtype=torch.bfloat16,\n",
    ")\n",
    "\n",
    "# model = model.merge_and_unload()\n",
    "if not hasattr(model, \"hf_device_map\"):\n",
    "    model.cuda()\n",
    "\n",
    "model_id = \"smangrul/peft-lora-starcoder15B-v2-personal-copilot-A100-40GB-colab\"\n",
    "model = PeftModel.from_pretrained(model, model_id, adapter_name=\"personal_copilot\")\n",
    "model.add_weighted_adapter([\"personal_copilot\"], [0.8], \"best_personal_copilot\")\n",
    "model.set_adapter(\"best_personal_copilot\")\n",
    "\n",
    "\n",
    "def get_code_completion(prefix, suffix):\n",
    "    text = prompt = f\"\"\"<fim_prefix>{prefix}<fim_suffix>{suffix}<fim_middle>\"\"\"\n",
    "    model.eval()\n",
    "    outputs = model.generate(\n",
    "        input_ids=tokenizer(text, return_tensors=\"pt\").input_ids.cuda(),\n",
    "        max_new_tokens=128,\n",
    "        temperature=0.2,\n",
    "        top_k=50,\n",
    "        top_p=0.95,\n",
    "        do_sample=True,\n",
    "        repetition_penalty=1.0,\n",
    "    )\n",
    "    return tokenizer.batch_decode(outputs, skip_special_tokens=False)[0]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "bd6961ee",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
      "Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<fim_prefix>from accelerate import Accelerator\n",
      "\n",
      "accelerator = Accelerator()\n",
      "\n",
      "model, optimizer, training_dataloader, scheduler = <fim_suffix><fim_middle>accelerator.prepare(\n",
      "    model, optimizer, training_dataloader, scheduler\n",
      ")\n",
      "```\n",
      "\n",
      "## Distributed Training\n",
      "\n",
      "To use distributed training, pass the `device_placement=True` argument to [`Accelerator()`](https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator).\n",
      "\n",
      "```py\n",
      "from accelerate import Accelerator\n",
      "\n",
      "accelerator = Accelerator(device_placement=True)\n",
      "\n",
      "model, optimizer, training_dataloader, scheduler = accelerator.prepare(\n",
      "    model, optimizer, training_dataloader, scheduler\n",
      ")\n"
     ]
    }
   ],
   "source": [
    "prefix = \"\"\"from accelerate import Accelerator\n",
    "\n",
    "accelerator = Accelerator()\n",
    "\n",
    "model, optimizer, training_dataloader, scheduler = \"\"\"\n",
    "\n",
    "suffix = \"\"\"\"\"\"\n",
    "print(get_code_completion(prefix, suffix))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "ffc431aa",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
      "Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<fim_prefix>from peft import LoraConfig, TaskType, get_peft_model\n",
      "from transformers import AutoModelForCausalLM\n",
      "\n",
      "peft_config = LoraConfig(<fim_suffix>)<fim_middle>\n",
      "    task_type=TaskType.CAUSAL_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1\n",
      ")\n",
      "model = AutoModelForCausalLM.from_pretrained(\"gpt2\")\n",
      "model = get_peft_model(model, peft_config<|endoftext|>\n"
     ]
    }
   ],
   "source": [
    "prefix = \"\"\"\\\n",
    "from peft import LoraConfig, TaskType, get_peft_model\n",
    "from transformers import AutoModelForCausalLM\n",
    "\n",
    "peft_config = LoraConfig(\"\"\"\n",
    "\n",
    "suffix = \")\"\n",
    "print(get_code_completion(prefix, suffix))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "6fca87c6",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
      "Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<fim_prefix>\n",
      "# Here is the correct implementation of the two sum code exercise\n",
      "# time complexity: O(N)\n",
      "# space complexity: O(N)\n",
      "def two_sum(arr, target_sum):\n",
      "<fim_suffix><fim_middle>    # create a hash table\n",
      "    hash_table = {}\n",
      "    for i, num in enumerate(arr):\n",
      "        if target_sum - num in hash_table:\n",
      "            return [hash_table[target_sum - num], i]\n",
      "        hash_table[num] = i\n",
      "    return []\n",
      "<|endoftext|>\n"
     ]
    }
   ],
   "source": [
    "prefix = \"\"\"\n",
    "# Here is the correct implementation of the two sum code exercise\n",
    "# time complexity: O(N)\n",
    "# space complexity: O(N)\n",
    "def two_sum(arr, target_sum):\n",
    "\"\"\"\n",
    "\n",
    "suffix = \"\"\"\"\"\"\n",
    "print(get_code_completion(prefix, suffix))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "7f9ec434",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
      "Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<fim_prefix>import math\n",
      "import re\n",
      "import warnings\n",
      "from dataclasses import asdict, dataclass, field, replace\n",
      "from enum import Enum\n",
      "from typing import List, Optional, Tuple, Union\n",
      "\n",
      "import torch\n",
      "import torch.nn as nn\n",
      "import torch.nn.functional as F\n",
      "from tqdm import tqdm\n",
      "from transformers.pytorch_utils import Conv1D\n",
      "\n",
      "from..config import PeftConfig\n",
      "from..import_utils import is_bnb_4bit_available, is_bnb_available\n",
      "from..utils import (\n",
      "    CLAMP_QUANTILE,\n",
      "    COMMON_LAYERS_PATTERN,\n",
      "    TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,\n",
      "    ModulesToSaveWrapper,\n",
      "    PeftType,\n",
      "    _freeze_adapter,\n",
      "    _get_submodules,\n",
      "    transpose,\n",
      ")\n",
      "from.tuners_utils import BaseTuner, BaseTunerLayer\n",
      "\n",
      "@dataclass\n",
      "class BottleneckAdapterConfig(PeftConfig):\n",
      "    \"\"\"\n",
      "    <fim_suffix>\n",
      "    \"\"\" <fim_middle>Config for [`BottleneckAdapter`].\n",
      "\n",
      "    Args:\n",
      "        in_features (`int`): The number of input features.\n",
      "        out_features (`int`): The number of output features.\n",
      "        hidden_features (`int`): The number of hidden features.\n",
      "        num_attention_heads (`int`): The number of attention heads.\n",
      "        attention_head_dim (`int`): The dimension of each attention head.\n",
      "        bias (`bool`, *optional*, defaults to `True`):\n",
      "            Whether or not to add a bias to the output.\n",
      "        modules_to_save (`List[str]`, *optional*):\n",
      "            The list of modules to\n"
     ]
    }
   ],
   "source": [
    "prefix = \"\"\"import math\n",
    "import re\n",
    "import warnings\n",
    "from dataclasses import asdict, dataclass, field, replace\n",
    "from enum import Enum\n",
    "from typing import List, Optional, Tuple, Union\n",
    "\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "from tqdm import tqdm\n",
    "from transformers.pytorch_utils import Conv1D\n",
    "\n",
    "from ..config import PeftConfig\n",
    "from ..import_utils import is_bnb_4bit_available, is_bnb_available\n",
    "from ..utils import (\n",
    "    CLAMP_QUANTILE,\n",
    "    COMMON_LAYERS_PATTERN,\n",
    "    TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,\n",
    "    ModulesToSaveWrapper,\n",
    "    PeftType,\n",
    "    _freeze_adapter,\n",
    "    _get_submodules,\n",
    "    transpose,\n",
    ")\n",
    "from .tuners_utils import BaseTuner, BaseTunerLayer\n",
    "\n",
    "@dataclass\n",
    "class BottleneckAdapterConfig(PeftConfig):\n",
    "    \\\"\"\"\n",
    "    \"\"\"\n",
    "\n",
    "suffix = \"\"\"\n",
    "    \\\"\"\" \\\n",
    "\"\"\"\n",
    "\n",
    "print(get_code_completion(prefix, suffix))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fdc18b86",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
